Downloading Programs

Download R
Download R Studio

Resources

R Studio Cheat Sheets
R Markdown Introduction
R Markdown Gallery

Chunk #2–Opening Packages

# This is to be run every time you use this R Markdown document
library(dplyr) # Data Manipulation
library(knitr) # R Markdown files
library(readr) # Reading data into R
library(tidyr) # Data Transformation

Chunk #3–Setting the Working Directory

getwd()
## [1] "C:/Users/alwalker/Desktop/NEAIR Presentation"
# 1 Open Windows Explorer
# 2 Navigate to C Drive
# 3 Select "Users"
# 4 Select Desktop
# 5 Copy the Address **BE SURE TO REPLACE THE "\" WITH "/"
setwd("c:/Users/alwalker/Desktop/NEAIR Presentation")

Chunks #4–Naming objects

# Format of and R object
title <- "You_R_What_You_Code"

year <- 2020

Chunks #5–Reading in Data–There are many ways to do this

Method 1–Not Through and Through the Working directory

# This file could be read into R without setting a working directory but you would have to run (without #): 
# degrees <- read_csv("c:/Users/alwalker/Desktop/NEAIR Presentation/NEAIR_IPEDS_Sample_Data_Set_Degrees.csv"). 
# This should not be run if you DID set a directory because doing both would cause an error.

# Reading data in (readr package)
# Because you set a working directory, all you'll have to do is refer to the file by name when reading it into R. 
degrees <- read_csv("NEAIR_IPEDS_Sample_Data_Set_Degrees.csv")

rm(title, year)

#### Method 2--Through R Studio's Import Data Function (refer to NEAIR's video to view)

#### Method 3--Code for getting data straight from the Internet
degrees <- read.csv(url("https://raw.githubusercontent.com/annlaurawalker/NEAIR_R-R_Session/master/NEAIR_IPEDS_Sample_Data_Set_Degrees.csv"))

Seeing, Renaming, and Rearranging Column Names

Chunk #6–Seeing a List of Column Names

colnames(degrees)
##  [1] "unitid"                                                                                
##  [2] "institution.name"                                                                      
##  [3] "year"                                                                                  
##  [4] "HD2019.State.abbreviation"                                                             
##  [5] "HD2019.Sector.of.institution"                                                          
##  [6] "HD2019.Historically.Black.College.or.University"                                       
##  [7] "DRVC2019.Associate.s.degree"                                                           
##  [8] "DRVC2019.Bachelor.s.degree"                                                            
##  [9] "DRVC2019.Master.s.degree"                                                              
## [10] "DRVC2019.Doctor.s.degree...research.scholarship"                                       
## [11] "DRVC2019.Doctor.s.degree...professional.practice"                                      
## [12] "DRVC2019.Doctor.s.degree...other"                                                      
## [13] "DRVC2019.Certificates.of.less.than.1.year"                                             
## [14] "DRVC2019.Certificates.of.1.but.less.than.2.years"                                      
## [15] "DRVC2019.Certificates.of.2.but.less.than.4.years"                                      
## [16] "DRVC2019.Postbaccalaureate.certificates"                                               
## [17] "DRVC2019.Post.master.s.certificates"                                                   
## [18] "DRVC2019.Number.of.students.receiving.an.Associate.s.degree"                           
## [19] "DRVC2019.Number.of.students.receiving.a.Bachelor.s.degree"                             
## [20] "DRVC2019.Number.of.students.receiving.a.Master.s.degree"                               
## [21] "DRVC2019.Number.of.students.receiving.a.Doctor.s.degree"                               
## [22] "DRVC2019.Number.of.students.receiving.a.certificate.of.less.than.1.year"               
## [23] "DRVC2019.Number.of.students.receiving.a.certificate.of.1.but.less.than.4.years"        
## [24] "DRVC2019.Number.of.students.receiving.a.Postbaccalaureate.or.Post.master.s.certificate"
# Column names, as you can see, are long and have blanks and punctuation in the names. Code in the next chunk fixes both problems:

Chunk #7–Renaming Column Names

# Changing column names
colnames(degrees) <- c("unitid", "inst_name","year", "state", "inst_sector", "hbcu", "count_assoc_degrees", "count_bach_degrees", "count_mast_degrees", "count_phd_rs_degrees", "count_phd_pp_degrees", "count_phd_other_degrees", "count_certs_less_yr", "count_certs_1_2_yr", "count_certs_2_4_yr", "count_certs_post_bach", "count_certs_post_mast", "count_assoc_students", "count_bach_students", "count_mast_students", "count_phd_students", "count_certs_less_yr_students", "count_certs_1_4_yr_students", "count_certs_post_bach_mast_students")

colnames(degrees)
##  [1] "unitid"                              "inst_name"                          
##  [3] "year"                                "state"                              
##  [5] "inst_sector"                         "hbcu"                               
##  [7] "count_assoc_degrees"                 "count_bach_degrees"                 
##  [9] "count_mast_degrees"                  "count_phd_rs_degrees"               
## [11] "count_phd_pp_degrees"                "count_phd_other_degrees"            
## [13] "count_certs_less_yr"                 "count_certs_1_2_yr"                 
## [15] "count_certs_2_4_yr"                  "count_certs_post_bach"              
## [17] "count_certs_post_mast"               "count_assoc_students"               
## [19] "count_bach_students"                 "count_mast_students"                
## [21] "count_phd_students"                  "count_certs_less_yr_students"       
## [23] "count_certs_1_4_yr_students"         "count_certs_post_bach_mast_students"

Chunk #8–Rearranging Columns

# Code below is using the dplyr package

# "%>%" is called a pipe and, in the R language, it's very similar to an if/then statement. 
# In prose, the code below means
# Create a new dataframe named "degrees_arranged" THEN
# Take the data frame "degrees" THEN 
# Select these fields in this order

degrees_arranged <-
  degrees %>%  
  select(unitid, inst_name, year, count_assoc_degrees, count_assoc_students, count_bach_degrees, count_bach_students, count_mast_degrees, count_mast_students, count_phd_rs_degrees, count_phd_pp_degrees, count_phd_other_degrees, count_phd_students, count_certs_less_yr, count_certs_less_yr_students, count_certs_1_2_yr, count_certs_2_4_yr, count_certs_1_4_yr_students, count_certs_post_bach, count_certs_post_mast, count_certs_post_bach_mast_students)


# Note the both the new data frame in the environment AND number of variables!
# That line didn't include these variables: inst_sector, state, hbcu
# Sometimes you will want to get rid of variables but other times you'll want to keep all. 

degrees <- degrees %>%  select(year, state, unitid, inst_name, hbcu, inst_sector, count_assoc_degrees, count_assoc_students, count_bach_degrees, count_bach_students, count_mast_degrees, count_mast_students, count_phd_rs_degrees, count_phd_pp_degrees, count_phd_other_degrees, count_phd_students, count_certs_less_yr, count_certs_less_yr_students,count_certs_1_2_yr, count_certs_2_4_yr, count_certs_1_4_yr_students, count_certs_post_bach, count_certs_post_mast, count_certs_post_bach_mast_students)

# rm removes listed items in the environment
rm(degrees_arranged)

Chunk #9–Head of a Data Frame

head(degrees)
##   year        state unitid                             inst_name hbcu
## 1 2019     Maryland 491631   Women's Institute of Torah Seminary   No
## 2 2019     Virginia 234155             Virginia State University  Yes
## 3 2019     Virginia 231624                        William & Mary   No
## 4 2019     Maryland 163912        St. Mary's College of Maryland   No
## 5 2019         Ohio 200590                 ETI Technical College   No
## 6 2019 Rhode Island 479062 MotoRing Technical Training Institute   No
##                               inst_sector count_assoc_degrees
## 1 Private not-for-profit, 4-year or above                   0
## 2                 Public, 4-year or above                   0
## 3                 Public, 4-year or above                   0
## 4                 Public, 4-year or above                   0
## 5              Private for-profit, 2-year                  11
## 6    Private for-profit, less-than 2-year                   0
##   count_assoc_students count_bach_degrees count_bach_students
## 1                    0                 45                  45
## 2                    0                672                 672
## 3                    0               1653                1653
## 4                    0                392                 392
## 5                   11                 NA                  NA
## 6                    0                 NA                  NA
##   count_mast_degrees count_mast_students count_phd_rs_degrees
## 1                  0                   0                    0
## 2                113                 113                   19
## 3                737                 734                   85
## 4                 29                  29                    0
## 5                 NA                  NA                   NA
## 6                 NA                  NA                   NA
##   count_phd_pp_degrees count_phd_other_degrees count_phd_students
## 1                    0                       0                  0
## 2                    0                       0                 19
## 3                  230                       0                315
## 4                    0                       0                  0
## 5                   NA                      NA                 NA
## 6                   NA                      NA                 NA
##   count_certs_less_yr count_certs_less_yr_students count_certs_1_2_yr
## 1                   0                            0                  0
## 2                   0                            0                  0
## 3                   0                            0                  0
## 4                   0                            0                  0
## 5                   0                            0                 91
## 6                   0                            0                 43
##   count_certs_2_4_yr count_certs_1_4_yr_students count_certs_post_bach
## 1                  0                           0                     0
## 2                  0                           0                    20
## 3                  0                           0                     9
## 4                  0                           0                     0
## 5                  0                          91                    NA
## 6                  0                          43                    NA
##   count_certs_post_mast count_certs_post_bach_mast_students
## 1                     0                                   0
## 2                     0                                  20
## 3                     7                                  16
## 4                     0                                   0
## 5                    NA                                  NA
## 6                    NA                                  NA

Chunk #10–Viewing the nth Row of Data

# Example: How to see the 10th row of data:
degrees[10,]
##    year state unitid               inst_name hbcu                inst_sector
## 10 2019  Ohio 457891 Elevate Salon Institute   No Private for-profit, 2-year
##    count_assoc_degrees count_assoc_students count_bach_degrees
## 10                  NA                   NA                 NA
##    count_bach_students count_mast_degrees count_mast_students
## 10                  NA                 NA                  NA
##    count_phd_rs_degrees count_phd_pp_degrees count_phd_other_degrees
## 10                   NA                   NA                      NA
##    count_phd_students count_certs_less_yr count_certs_less_yr_students
## 10                 NA                  NA                           NA
##    count_certs_1_2_yr count_certs_2_4_yr count_certs_1_4_yr_students
## 10                 NA                 NA                          NA
##    count_certs_post_bach count_certs_post_mast
## 10                    NA                    NA
##    count_certs_post_bach_mast_students
## 10                                  NA

Chunk #11–First Twenty-Five Rows of a Dataframe

head(degrees, 25)
##    year         state unitid                               inst_name hbcu
## 1  2019      Maryland 491631     Women's Institute of Torah Seminary   No
## 2  2019      Virginia 234155               Virginia State University  Yes
## 3  2019      Virginia 231624                          William & Mary   No
## 4  2019      Maryland 163912          St. Mary's College of Maryland   No
## 5  2019          Ohio 200590                   ETI Technical College   No
## 6  2019  Rhode Island 479062   MotoRing Technical Training Institute   No
## 7  2019      Maryland 444291       Empire Beauty School-Owings Mills   No
## 8  2019      Delaware 450298             Strayer University-Delaware   No
## 9  2019      Virginia 232919                   Tidewater Tech-Trades   No
## 10 2019          Ohio 457891                 Elevate Salon Institute   No
## 11 2019      Maryland 162609                         Garrett College   No
## 12 2019          Ohio 203535                          Kenyon College   No
## 13 2019 Massachusetts 166124               College of the Holy Cross   No
## 14 2019         Maine 160940       Purdue University Global-Lewiston   No
## 15 2019      Maryland 162283                 Coppin State University  Yes
## 16 2019      Maryland 162168                      Chesapeake College   No
## 17 2019   Connecticut 455798       Oxford Academy of Hair Design Inc   No
## 18 2019          Ohio 202453                   Dayton Barber College   No
## 19 2019   Connecticut 129923 Lincoln Technical Institute-New Britain   No
## 20 2019          Ohio 201867  Cincinnati College of Mortuary Science   No
## 21 2019 Massachusetts 164474       Andover Newton Theological School   No
## 22 2019          Ohio 493521                     Global Tech College   No
## 23 2019      Virginia 232265                      Hampton University  Yes
## 24 2019         Maine 161518         Saint Joseph's College of Maine   No
## 25 2019 West Virginia 377652                  Valley College-Beckley   No
##                                inst_sector count_assoc_degrees
## 1  Private not-for-profit, 4-year or above                   0
## 2                  Public, 4-year or above                   0
## 3                  Public, 4-year or above                   0
## 4                  Public, 4-year or above                   0
## 5               Private for-profit, 2-year                  11
## 6     Private for-profit, less-than 2-year                   0
## 7     Private for-profit, less-than 2-year                   0
## 8      Private for-profit, 4-year or above                  13
## 9     Private for-profit, less-than 2-year                   0
## 10              Private for-profit, 2-year                  NA
## 11                          Public, 2-year                 102
## 12 Private not-for-profit, 4-year or above                   0
## 13 Private not-for-profit, 4-year or above                   0
## 14                 Public, 4-year or above                  NA
## 15                 Public, 4-year or above                   0
## 16                          Public, 2-year                 243
## 17    Private for-profit, less-than 2-year                   0
## 18              Private for-profit, 2-year                   0
## 19    Private for-profit, less-than 2-year                   0
## 20 Private not-for-profit, 4-year or above                  66
## 21 Private not-for-profit, 4-year or above                  NA
## 22              Private for-profit, 2-year                   0
## 23 Private not-for-profit, 4-year or above                   0
## 24 Private not-for-profit, 4-year or above                   4
## 25    Private for-profit, less-than 2-year                   0
##    count_assoc_students count_bach_degrees count_bach_students
## 1                     0                 45                  45
## 2                     0                672                 672
## 3                     0               1653                1653
## 4                     0                392                 392
## 5                    11                 NA                  NA
## 6                     0                 NA                  NA
## 7                     0                 NA                  NA
## 8                    13                 31                  31
## 9                     0                 NA                  NA
## 10                   NA                 NA                  NA
## 11                  102                 NA                  NA
## 12                    0                434                 434
## 13                    0                698                 698
## 14                   NA                 NA                  NA
## 15                    0                378                 378
## 16                  243                 NA                  NA
## 17                    0                 NA                  NA
## 18                    0                 NA                  NA
## 19                    0                 NA                  NA
## 20                   66                 48                  48
## 21                   NA                 NA                  NA
## 22                    0                 NA                  NA
## 23                    0                640                 640
## 24                    4                311                 311
## 25                    0                 NA                  NA
##    count_mast_degrees count_mast_students count_phd_rs_degrees
## 1                   0                   0                    0
## 2                 113                 113                   19
## 3                 737                 734                   85
## 4                  29                  29                    0
## 5                  NA                  NA                   NA
## 6                  NA                  NA                   NA
## 7                  NA                  NA                   NA
## 8                  12                  12                    0
## 9                  NA                  NA                   NA
## 10                 NA                  NA                   NA
## 11                 NA                  NA                   NA
## 12                  0                   0                    0
## 13                  0                   0                    0
## 14                 NA                  NA                   NA
## 15                 66                  66                    0
## 16                 NA                  NA                   NA
## 17                 NA                  NA                   NA
## 18                 NA                  NA                   NA
## 19                 NA                  NA                   NA
## 20                  0                   0                    0
## 21                 NA                  NA                   NA
## 22                 NA                  NA                   NA
## 23                141                 141                   14
## 24                196                 196                    0
## 25                 NA                  NA                   NA
##    count_phd_pp_degrees count_phd_other_degrees count_phd_students
## 1                     0                       0                  0
## 2                     0                       0                 19
## 3                   230                       0                315
## 4                     0                       0                  0
## 5                    NA                      NA                 NA
## 6                    NA                      NA                 NA
## 7                    NA                      NA                 NA
## 8                     0                       0                  0
## 9                    NA                      NA                 NA
## 10                   NA                      NA                 NA
## 11                   NA                      NA                 NA
## 12                    0                       0                  0
## 13                    0                       0                  0
## 14                   NA                      NA                 NA
## 15                    4                       0                  4
## 16                   NA                      NA                 NA
## 17                   NA                      NA                 NA
## 18                   NA                      NA                 NA
## 19                   NA                      NA                 NA
## 20                    0                       0                  0
## 21                   NA                      NA                 NA
## 22                   NA                      NA                 NA
## 23                   82                       0                 96
## 24                    0                       0                  0
## 25                   NA                      NA                 NA
##    count_certs_less_yr count_certs_less_yr_students count_certs_1_2_yr
## 1                    0                            0                  0
## 2                    0                            0                  0
## 3                    0                            0                  0
## 4                    0                            0                  0
## 5                    0                            0                 91
## 6                    0                            0                 43
## 7                    0                            0                 34
## 8                    0                            0                  0
## 9                  262                          262                143
## 10                  NA                           NA                 NA
## 11                   1                            1                  0
## 12                   0                            0                  0
## 13                   0                            0                  0
## 14                  NA                           NA                 NA
## 15                   0                            0                  0
## 16                  50                           50                  8
## 17                  29                           29                 25
## 18                   0                            0                  3
## 19                   0                            0                255
## 20                   0                            0                  0
## 21                  NA                           NA                 NA
## 22                   0                            0                  6
## 23                   0                            0                  0
## 24                  45                           45                  0
## 25                   0                            0                 75
##    count_certs_2_4_yr count_certs_1_4_yr_students count_certs_post_bach
## 1                   0                           0                     0
## 2                   0                           0                    20
## 3                   0                           0                     9
## 4                   0                           0                     0
## 5                   0                          91                    NA
## 6                   0                          43                    NA
## 7                   0                          34                    NA
## 8                   0                           0                     0
## 9                   0                         143                    NA
## 10                 NA                          NA                    NA
## 11                  0                           0                    NA
## 12                  0                           0                     0
## 13                  0                           0                     0
## 14                 NA                          NA                    NA
## 15                  0                           0                     0
## 16                  0                           8                    NA
## 17                  0                          25                    NA
## 18                 18                          21                    NA
## 19                  0                         255                    NA
## 20                  0                           0                     0
## 21                 NA                          NA                    NA
## 22                  0                           6                    NA
## 23                  0                           0                     0
## 24                  0                           0                     5
## 25                  0                          75                    NA
##    count_certs_post_mast count_certs_post_bach_mast_students
## 1                      0                                   0
## 2                      0                                  20
## 3                      7                                  16
## 4                      0                                   0
## 5                     NA                                  NA
## 6                     NA                                  NA
## 7                     NA                                  NA
## 8                      0                                   0
## 9                     NA                                  NA
## 10                    NA                                  NA
## 11                    NA                                  NA
## 12                     0                                   0
## 13                     0                                   0
## 14                    NA                                  NA
## 15                     0                                   0
## 16                    NA                                  NA
## 17                    NA                                  NA
## 18                    NA                                  NA
## 19                    NA                                  NA
## 20                     0                                   0
## 21                    NA                                  NA
## 22                    NA                                  NA
## 23                     5                                   5
## 24                    16                                  21
## 25                    NA                                  NA

Chunk #12–Last Four Rows of a Dataframe

tail(degrees, 10)
##      year                state unitid
## 1154 2019        Massachusetts 431099
## 1155 2019           New Jersey 184056
## 1156 2019 District of Columbia 492102
## 1157 2019                 Ohio 407568
## 1158 2019                 Ohio 200785
## 1159 2019        Massachusetts 165802
## 1160 2019             Virginia 459082
## 1161 2019                 Ohio 485908
## 1162 2019             Maryland 434937
## 1163 2019           New Jersey 451370
##                                               inst_name hbcu
## 1154               Jolie Hair and Beauty Academy-Ludlow   No
## 1155                 Lincoln Technical Institute-Iselin   No
## 1156 Daniel Morgan Graduate School of National Security   No
## 1157    Raphael's School of Beauty Culture Inc-Boardman   No
## 1158                           Herzing University-Akron   No
## 1159                                     Fisher College   No
## 1160  Virginia Polytechnic Institute & State University   No
## 1161                          Antioch University Online   No
## 1162             Yeshiva College of the Nations Capital   No
## 1163                            Yeshivas Be'er Yitzchok   No
##                                  inst_sector count_assoc_degrees
## 1154    Private for-profit, less-than 2-year                   0
## 1155    Private for-profit, less-than 2-year                   0
## 1156 Private not-for-profit, 4-year or above                   0
## 1157              Private for-profit, 2-year                   0
## 1158 Private not-for-profit, 4-year or above                 107
## 1159 Private not-for-profit, 4-year or above                  85
## 1160                 Public, 4-year or above                  NA
## 1161 Private not-for-profit, 4-year or above                   0
## 1162 Private not-for-profit, 4-year or above                   0
## 1163 Private not-for-profit, 4-year or above                   0
##      count_assoc_students count_bach_degrees count_bach_students
## 1154                    0                 NA                  NA
## 1155                    0                 NA                  NA
## 1156                    0                  0                   0
## 1157                    0                 NA                  NA
## 1158                  107                 24                  24
## 1159                   84                245                 244
## 1160                   NA                 NA                  NA
## 1161                    0                 32                  32
## 1162                    0                  5                   5
## 1163                    0                 20                  20
##      count_mast_degrees count_mast_students count_phd_rs_degrees
## 1154                 NA                  NA                   NA
## 1155                 NA                  NA                   NA
## 1156                 17                  17                    0
## 1157                 NA                  NA                   NA
## 1158                  0                   0                    0
## 1159                 36                  36                    0
## 1160                 NA                  NA                   NA
## 1161                  0                   0                    0
## 1162                  0                   0                    0
## 1163                  0                   0                    0
##      count_phd_pp_degrees count_phd_other_degrees count_phd_students
## 1154                   NA                      NA                 NA
## 1155                   NA                      NA                 NA
## 1156                    0                       0                  0
## 1157                   NA                      NA                 NA
## 1158                    0                       0                  0
## 1159                    0                       0                  0
## 1160                   NA                      NA                 NA
## 1161                    0                       0                  0
## 1162                    0                       0                  0
## 1163                    0                       0                  0
##      count_certs_less_yr count_certs_less_yr_students count_certs_1_2_yr
## 1154                  96                           96                 28
## 1155                   0                            0                256
## 1156                   0                            0                  0
## 1157                  23                           23                 16
## 1158                   0                            0                 19
## 1159                   0                            0                 16
## 1160                  NA                           NA                 NA
## 1161                   0                            0                  0
## 1162                   0                            0                  0
## 1163                   0                            0                  0
##      count_certs_2_4_yr count_certs_1_4_yr_students count_certs_post_bach
## 1154                  0                          28                    NA
## 1155                  0                         256                    NA
## 1156                  0                           0                     0
## 1157                 20                          36                    NA
## 1158                  0                          19                     0
## 1159                  0                          16                     0
## 1160                 NA                          NA                    NA
## 1161                  0                           0                     0
## 1162                  0                           0                     0
## 1163                  0                           0                     0
##      count_certs_post_mast count_certs_post_bach_mast_students
## 1154                    NA                                  NA
## 1155                    NA                                  NA
## 1156                     0                                   0
## 1157                    NA                                  NA
## 1158                     0                                   0
## 1159                     0                                   0
## 1160                    NA                                  NA
## 1161                     0                                   0
## 1162                     0                                   0
## 1163                     0                                   0

Chunk #13–Summary Function

summary(degrees)
##       year         state               unitid        inst_name        
##  Min.   :2019   Length:1163        Min.   :128498   Length:1163       
##  1st Qu.:2019   Class :character   1st Qu.:183076   Class :character  
##  Median :2019   Mode  :character   Median :217420   Mode  :character  
##  Mean   :2019                      Mean   :281226                     
##  3rd Qu.:2019                      3rd Qu.:434884                     
##  Max.   :2019                      Max.   :494843                     
##                                                                       
##      hbcu           inst_sector        count_assoc_degrees count_assoc_students
##  Length:1163        Length:1163        Min.   :   0.0      Min.   :   0.00     
##  Class :character   Class :character   1st Qu.:   0.0      1st Qu.:   0.00     
##  Mode  :character   Mode  :character   Median :   0.0      Median :   0.00     
##                                        Mean   : 119.3      Mean   : 116.65     
##                                        3rd Qu.:  36.0      3rd Qu.:  35.75     
##                                        Max.   :5273.0      Max.   :5163.00     
##                                        NA's   :41          NA's   :41          
##  count_bach_degrees count_bach_students count_mast_degrees count_mast_students
##  Min.   :    0.00   Min.   :    0.0     Min.   :   0.0     Min.   :   0.0     
##  1st Qu.:   27.25   1st Qu.:   27.0     1st Qu.:   0.0     1st Qu.:   0.0     
##  Median :  242.00   Median :  241.5     Median :  54.0     Median :  54.0     
##  Mean   :  712.80   Mean   :  704.2     Mean   : 342.0     Mean   : 338.5     
##  3rd Qu.:  691.25   3rd Qu.:  686.5     3rd Qu.: 278.5     3rd Qu.: 278.5     
##  Max.   :11107.00   Max.   :10906.0     Max.   :8182.0     Max.   :8150.0     
##  NA's   :633        NA's   :633         NA's   :633        NA's   :633        
##  count_phd_rs_degrees count_phd_pp_degrees count_phd_other_degrees
##  Min.   :  0.00       Min.   :  0.00       Min.   :  0.0000       
##  1st Qu.:  0.00       1st Qu.:  0.00       1st Qu.:  0.0000       
##  Median :  0.00       Median :  0.00       Median :  0.0000       
##  Mean   : 26.71       Mean   : 41.51       Mean   :  0.8472       
##  3rd Qu.:  3.75       3rd Qu.:  7.00       3rd Qu.:  0.0000       
##  Max.   :886.00       Max.   :845.00       Max.   :237.0000       
##  NA's   :633          NA's   :633          NA's   :633            
##  count_phd_students count_certs_less_yr count_certs_less_yr_students
##  Min.   :   0.00    Min.   :   0.00     Min.   :   0.00             
##  1st Qu.:   0.00    1st Qu.:   0.00     1st Qu.:   0.00             
##  Median :   0.00    Median :   0.00     Median :   0.00             
##  Mean   :  68.95    Mean   :  45.98     Mean   :  42.14             
##  3rd Qu.:  28.75    3rd Qu.:  29.00     3rd Qu.:  29.00             
##  Max.   :1687.00    Max.   :5156.00     Max.   :3560.00             
##  NA's   :633        NA's   :41          NA's   :41                  
##  count_certs_1_2_yr count_certs_2_4_yr count_certs_1_4_yr_students
##  Min.   :   0.00    Min.   :  0.000    Min.   :   0.00            
##  1st Qu.:   0.00    1st Qu.:  0.000    1st Qu.:   0.00            
##  Median :   1.00    Median :  0.000    Median :   6.00            
##  Mean   :  44.69    Mean   :  3.583    Mean   :  47.39            
##  3rd Qu.:  47.75    3rd Qu.:  0.000    3rd Qu.:  51.00            
##  Max.   :1576.00    Max.   :437.000    Max.   :1575.00            
##  NA's   :41         NA's   :41         NA's   :41                 
##  count_certs_post_bach count_certs_post_mast
##  Min.   :  0.00        Min.   :  0.000      
##  1st Qu.:  0.00        1st Qu.:  0.000      
##  Median :  0.00        Median :  0.000      
##  Mean   : 23.01        Mean   :  6.889      
##  3rd Qu.:  7.00        3rd Qu.:  0.000      
##  Max.   :816.00        Max.   :333.000      
##  NA's   :633           NA's   :633          
##  count_certs_post_bach_mast_students
##  Min.   :  0.00                     
##  1st Qu.:  0.00                     
##  Median :  0.00                     
##  Mean   : 29.05                     
##  3rd Qu.: 14.75                     
##  Max.   :815.00                     
##  NA's   :633

Chunk #14–Structure Function–What do your data look like?

str(degrees)
## 'data.frame':    1163 obs. of  24 variables:
##  $ year                               : int  2019 2019 2019 2019 2019 2019 2019 2019 2019 2019 ...
##  $ state                              : chr  "Maryland" "Virginia" "Virginia" "Maryland" ...
##  $ unitid                             : int  491631 234155 231624 163912 200590 479062 444291 450298 232919 457891 ...
##  $ inst_name                          : chr  "Women's Institute of Torah Seminary" "Virginia State University" "William & Mary" "St. Mary's College of Maryland" ...
##  $ hbcu                               : chr  "No" "Yes" "No" "No" ...
##  $ inst_sector                        : chr  "Private not-for-profit, 4-year or above" "Public, 4-year or above" "Public, 4-year or above" "Public, 4-year or above" ...
##  $ count_assoc_degrees                : int  0 0 0 0 11 0 0 13 0 NA ...
##  $ count_assoc_students               : int  0 0 0 0 11 0 0 13 0 NA ...
##  $ count_bach_degrees                 : int  45 672 1653 392 NA NA NA 31 NA NA ...
##  $ count_bach_students                : int  45 672 1653 392 NA NA NA 31 NA NA ...
##  $ count_mast_degrees                 : int  0 113 737 29 NA NA NA 12 NA NA ...
##  $ count_mast_students                : int  0 113 734 29 NA NA NA 12 NA NA ...
##  $ count_phd_rs_degrees               : int  0 19 85 0 NA NA NA 0 NA NA ...
##  $ count_phd_pp_degrees               : int  0 0 230 0 NA NA NA 0 NA NA ...
##  $ count_phd_other_degrees            : int  0 0 0 0 NA NA NA 0 NA NA ...
##  $ count_phd_students                 : int  0 19 315 0 NA NA NA 0 NA NA ...
##  $ count_certs_less_yr                : int  0 0 0 0 0 0 0 0 262 NA ...
##  $ count_certs_less_yr_students       : int  0 0 0 0 0 0 0 0 262 NA ...
##  $ count_certs_1_2_yr                 : int  0 0 0 0 91 43 34 0 143 NA ...
##  $ count_certs_2_4_yr                 : int  0 0 0 0 0 0 0 0 0 NA ...
##  $ count_certs_1_4_yr_students        : int  0 0 0 0 91 43 34 0 143 NA ...
##  $ count_certs_post_bach              : int  0 20 9 0 NA NA NA 0 NA NA ...
##  $ count_certs_post_mast              : int  0 0 7 0 NA NA NA 0 NA NA ...
##  $ count_certs_post_bach_mast_students: int  0 20 16 0 NA NA NA 0 NA NA ...

Performing more functions on the data frame

Chunk #15–Sorting a Data Frame

degrees <- degrees %>% arrange(state, inst_name)

# An example of sorting by descending order--Sorting by state and count of bachelors degrees, highest to lowest
degrees_bach <- degrees %>% arrange(state, (desc(count_bach_degrees)))

# Removing data frames from the environment
rm(degrees_bach)

Chunk #16–Filtering a Data Frame

# Creating a data frame of institutions in New Jersey
new_jersey <- degrees %>% # selects dataframe "degrees"
  filter(state == "New Jersey")

# What about multiple conditions? 
# Creating a data frame with HBCU institutions in Virginia
virginia_hbcu <- degrees %>% 
  filter(state == "Virginia", hbcu=="Yes")

virginia_non_hbcu <- degrees %>% 
  filter(state == "Virginia", hbcu!="Yes")

# What if you wanted to filter for Massachusetts institutions and only wanted student counts and the institution's main info? 
# You filter and then select your desired variables 
mass_student_count <- degrees %>% 
  filter(state == "Massachusetts") %>%
  select(unitid, inst_name, hbcu, inst_sector, count_assoc_students, count_bach_students, count_mast_students, count_phd_students, count_certs_less_yr_students, count_certs_1_4_yr_students, count_certs_post_bach_mast_students)   

# What about numbers? You don't use quotation marks around the filter conditions
# Example below 
assoc_deg <- degrees %>%   
  filter(count_assoc_degrees >= 1000) %>%  # where the count of associates degrees is greater than or equal to 1,000
  select(unitid, inst_name, state, hbcu, inst_sector, count_assoc_degrees, count_assoc_students) %>%  # selects these variables 
  arrange(desc(count_assoc_degrees))   # and, finally, arranges them in descending order by "count_assoc"

rm(new_jersey, virginia_hbcu, virginia_non_hbcu, mass_student_count, assoc_deg) 

Chunk #17–Creating New Variables

# Creating a new data frame for phds using the OR function. 
# This data frame filters "degrees" by rows where any of the PhD degree variables is greater than zero. 
phd_degrees <- degrees %>%
  filter(count_phd_rs_degrees > 0 | count_phd_pp_degrees > 0 | count_phd_other_degrees > 0 ) %>%  
  select(unitid, inst_name, state, hbcu, inst_sector, count_phd_rs_degrees, count_phd_pp_degrees, count_phd_other_degrees)

# structure of a new variable

## df_name$new_variable_name <- df_name$variable_1 + df_name$variable_2
 

# Let's create a new variable--Total PhDs
phd_degrees$degrees_total <- phd_degrees$count_phd_rs_degrees + phd_degrees$count_phd_pp_degrees + phd_degrees$count_phd_other_degrees

# Percentage of PhDs that were research and scholarly 
# Note: / is used for dividing (* for multiplication, - for subtraction)
phd_degrees$pct_phd_rs <- phd_degrees$count_phd_rs_degrees / phd_degrees$degrees_total

# What do you notice about this new variable?
# Can we round it?
# Of course!

# How to round an already established variable:
phd_degrees$pct_phd_rs <- round(phd_degrees$pct_phd_rs, digits=3)

# How to combine both functions of calculating a percentage and rounding it at the same time
phd_degrees$pct_phd_rs <- round(phd_degrees$count_phd_rs_degrees / phd_degrees$degrees_total, digits=3)


# What if you want it formatted like a percent for a report?
# This is a user-defined function that does just that.
percent <- function(x, digits = 1, format = "f", ...) { 
  paste0(formatC(x * 100, format = format, digits = digits, ...), "%")
}

# note the _char at the end of this new variable. Once you create a format to a variable, it often ceases to be considered a number. 
# During the presentation, line 274 caused an error. That's because I was using the wrong variable for total. View line 249. You'll see that the "total" variable's name is "degrees_total" and, in line 274, I called the total variable "count_phd_degrees_total." Learn from this mistake. The original total variable was going to be the long "count_phd_degrees_total" but I had revised it to something smaller "degrees_total" and I didn't do all the edits. Line 276 has the correct line of code. 
phd_degrees$pct_phd_rs_char <- percent(round((phd_degrees$count_phd_rs_degrees / phd_degrees$count_phd_degrees_total), digits=3))

phd_degrees$pct_phd_rs_char <- percent(round((phd_degrees$count_phd_rs_degrees / phd_degrees$degrees_total), digits=3))

# You won't be able to conduct any math operations on character variables
str(phd_degrees)
## 'data.frame':    206 obs. of  11 variables:
##  $ unitid                 : int  128771 129242 129491 130226 130253 130493 128744 129020 436827 463056 ...
##  $ inst_name              : chr  "Central Connecticut State University" "Fairfield University" "Hartford Seminary" "Quinnipiac University" ...
##  $ state                  : chr  "Connecticut" "Connecticut" "Connecticut" "Connecticut" ...
##  $ hbcu                   : chr  "No" "No" "No" "No" ...
##  $ inst_sector            : chr  "Public, 4-year or above" "Private not-for-profit, 4-year or above" "Private not-for-profit, 4-year or above" "Private not-for-profit, 4-year or above" ...
##  $ count_phd_rs_degrees   : int  5 0 0 0 0 14 19 345 4 7 ...
##  $ count_phd_pp_degrees   : int  5 31 0 311 71 0 61 366 4 8 ...
##  $ count_phd_other_degrees: int  0 0 5 0 0 0 0 0 0 0 ...
##  $ degrees_total          : int  10 31 5 311 71 14 80 711 8 15 ...
##  $ pct_phd_rs             : num  0.5 0 0 0 0 1 0.238 0.485 0.5 0.467 ...
##  $ pct_phd_rs_char        : chr  "50.0%" "0.0%" "0.0%" "0.0%" ...

Chunk #17b–Will there be rounding errors?

## Someone asked about the possibility of rounding errors
## To test this, we'll create 3 percent variables rounded to 2 digits 
phd_degrees$pct_phd_rs_round <- round(phd_degrees$count_phd_rs_degrees / phd_degrees$degrees_total, digits=2)
phd_degrees$pct_phd_pp_round <- round(phd_degrees$count_phd_pp_degrees / phd_degrees$degrees_total, digits=2)
phd_degrees$pct_phd_other_round <- round(phd_degrees$count_phd_other_degrees / phd_degrees$degrees_total, digits=2)

# Then we'll create the 3 percentage variables without any rounding
phd_degrees$pct_phd_rs <- phd_degrees$count_phd_rs_degrees / phd_degrees$degrees_total
phd_degrees$pct_phd_pp <- phd_degrees$count_phd_pp_degrees / phd_degrees$degrees_total
phd_degrees$pct_phd_other <- phd_degrees$count_phd_other_degrees / phd_degrees$degrees_total

# Finally we'll test them by adding all the rounded variables together (test1) and then adding all the non-rounded variables together (test2)

phd_degrees$test1 <- phd_degrees$pct_phd_rs_round + phd_degrees$pct_phd_pp_round + phd_degrees$pct_phd_other_round 

phd_degrees$test2 <- phd_degrees$pct_phd_rs + phd_degrees$pct_phd_pp + phd_degrees$pct_phd_other 


# we can see with both variables that they all add up to 1.00 (for test1) and 1 (for test2)


rm(phd_degrees)

Chunk #18–Grouping a Data Frame

# Using dplyr's summarise function

# This is a record count of the institutions by state
count_by_state <- degrees %>% 
  group_by(state) %>% 
  summarise (inst_count = n())

# You can add onto this and use a variety of functions
count_by_state_assoc_deg <- degrees %>% 
  group_by(state) %>% 
  summarise (inst_count = n(),
             sum_assoc_degrees = sum(count_assoc_degrees))

# Look at this data frame--what's wrong with it? 

# Why was DC, DE and WV the only states with totals? 
test <- degrees %>% 
  filter(state == "West Virginia" | state == "District of Columbia" | state == "Delaware")

# For whatever reason, WV, DC & DE had 0s listed in the data frame and that's why the sum function could work

# Two ways to fix this
# 1- in the specific data frame:
count_by_state_assoc_deg <- degrees %>% 
  group_by(state) %>% 
  summarise (inst_count = n(),
             sum_assoc_degrees = sum(count_assoc_degrees, na.rm = TRUE))

# 2- getting rid of the NAs in the data frame
# replacing NAs in just one column
degrees$count_assoc_degrees[is.na(degrees$count_assoc_degrees)] <- 0

# replacing NAs--whole dataframe
degrees[is.na(degrees)] <- 0

# Now that 0s are in place of all NAs in the "degrees" dataframe, the na.rm feature is no longer needed
count_by_state_assoc_bach_deg <- degrees %>% 
  group_by(state) %>% 
  summarise (inst_count = n(),
             sum_assoc_degrees = sum(count_assoc_degrees),
             sum_bach_degrees = sum(count_bach_degrees))

# What if you want sums of all the numeric variables? 
total_by_state <- degrees %>% group_by(state) %>% summarise_each(funs(sum)) #spoiler--this will cause an error
## Warning: `summarise_each_()` is deprecated as of dplyr 0.7.0.
## Please use `across()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
## Warning: `funs()` is deprecated as of dplyr 0.8.0.
## Please use a list of either functions or lambdas: 
## 
##   # Simple named list: 
##   list(mean = mean, median = median)
## 
##   # Auto named with `tibble::lst()`: 
##   tibble::lst(mean, median)
## 
##   # Using lambdas
##   list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
## Error: Problem with `summarise()` input `inst_name`.
## x invalid 'type' (character) of argument
## i Input `inst_name` is `sum(inst_name)`.
## i The error occured in group 1: state = "Connecticut".

Our reaction:

Chunk #19–Grouping a Data Frame, Take 2

# this line says summarise if the variable is numeric
total_by_state <- degrees %>% group_by(state) %>% summarise_if(is.numeric,sum)

# This summed up Unit ID and Year, which isn't necessary or helpful 
# The code below changes the 'unitid' and 'year' from numerics to characters
degrees$unitid <- as.character(as.numeric(degrees$unitid))
degrees$year <- as.character(as.numeric(degrees$year))

# Let's try this again
total_by_state_v2 <- degrees %>% group_by(state) %>% summarise_if(is.numeric,sum)

rm(count_by_state, count_by_state_assoc_deg, test, total_by_state, total_by_state_v2)

Chunk #20–Extracting the lowest value by group

# extracting the minimum of a field
lowest_masters_count <- degrees %>% 
  group_by(state) %>% 
  slice(which.min(count_mast_degrees))

# All that code did was include the first institution listed with a value of 0. Not very helpful. 

# Options
# 1- We could create a new data frame where the "count_mast_degrees" is greater than 0 and use that data frame 
masters_deg <- degrees %>% filter(count_mast_degrees > 0) 

lowest_masters_count_option_1 <- masters_deg %>% 
  group_by(state) %>% 
  slice(which.min(count_mast_degrees)) 

# 2b-When that occurs, usually you have to rearrange the commands. In this case, the filter needs to be above the slicing
lowest_masters_count_option_2 <- degrees %>% 
  group_by(state) %>% 
  filter(count_mast_degrees > 0) %>% 
  slice(which.min(count_mast_degrees))

# slice(which.max), as you can image, gives you the maximum value

rm(lowest_masters_count, masters_deg, lowest_masters_count_option_1, lowest_masters_count_option_2)

Chunk #21–Merging two Data Frames together

Data were extracted from this site at the bottom. “name-abbr.csv”

# Merge the State name from 'degrees' with its state abbreviation

# I wanted to have an example where we get data from a source online and read it into our R environment without having to download it

# Data extracted from: https://worldpopulationreview.com/states/state-abbreviations 
states <- read.csv(url("https://worldpopulationreview.com/static/states/name-abbr.csv"))
# What's wrong with this data frame?
head(states, 3) 
##    Alabama AL
## 1   Alaska AK
## 2  Arizona AZ
## 3 Arkansas AR
states_v2 <- read.csv(url("https://worldpopulationreview.com/static/states/name-abbr.csv"), header = FALSE)
# What's wrong with this data frame?
head(states_v2, 2) 
##        V1 V2
## 1 Alabama AL
## 2  Alaska AK
# the good news is we now have ALL states in the data frame but the bad news is the column names aren't meaningful
names(states_v2) <- c("state_name","state_abb")
head(states_v2, 2) 
##   state_name state_abb
## 1    Alabama        AL
## 2     Alaska        AK
# yay! The data are in the format we need!

# Merging the two data sets together
degrees <- merge(x=degrees, y=states_v2, by.x="state", by.y="state_name", all.x = TRUE)

# Note: the order of your variables has changed. You may want/need to rearrange them

rm(states, states_v2)

More on Loading Data into R
Cookbook for R is a really good source of R Help

Chunk #22–Data Cleaning

# 1- get a list of the unique values of inst_sector
sectors <- distinct(degrees, inst_sector)
sectors
##                                inst_sector
## 1     Private for-profit, less-than 2-year
## 2                 Public, less-than 2-year
## 3                           Public, 2-year
## 4  Private not-for-profit, 4-year or above
## 5               Private for-profit, 2-year
## 6      Private for-profit, 4-year or above
## 7                  Public, 4-year or above
## 8           Private not-for-profit, 2-year
## 9 Private not-for-profit, less-than 2-year

Chunk #22 (Cont’d)

# Lets say you wanted to divide the "sector" variable into three sections
# 1--public or private
# 2--for-profit or not-for profit
# 3--years
# We'll be doing R's version of Excel's Text to columns but first we have to git rid of spaces between the variables in inst_sector

# R's ifelse

# Lets start with the semantics of an ifelse statement

# data_frame$new_variable <- ifelse("this condition is met", "option1 if condition met", "option2 if condition is not met")


degrees$example <- ifelse(degrees$count_bach_degrees > 0, "Bachelor Degrees Awarded", "No Bachelor Degrees Awarded")


# Here is an example with different options:
degrees$inst_sector_rev <- ifelse(degrees$inst_sector=="Private for-profit, 2-year", "Private/for_profit/2_yr",
                                  ifelse(degrees$inst_sector=="Private for-profit, 4-year or above", "Private/for_profit/4_yr", 
                                         ifelse(degrees$inst_sector=="Private for-profit, less-than 2-year", "Private/for_profit/less_than_2_yr", 
                                                ifelse(degrees$inst_sector=="Private not-for-profit, 2-year", "Private/not_for_profit/2_yr", 
                                                       ifelse(degrees$inst_sector=="Private not-for-profit, 4-year or above", "Private/not_for_profit/4_yr", 
                                                              ifelse(degrees$inst_sector=="Private not-for-profit, less-than 2-year", "Private/not_for_profit/less_than_2_yr", 
                                                                     ifelse(degrees$inst_sector=="Public, 2-year", "Public/not_for_profit/2_yr", 
                                                                            ifelse(degrees$inst_sector=="Public, 4-year or above", "Public/not_for_profit/4_yr", "Public/not_for_profit/less_than_2_yr"))))))))
                                      

# we're duplicating this field so you can see the whole picture at the end
degrees$inst_sector_rev_2 <- degrees$inst_sector_rev


# This is R's version of Excel's Text To Columns feature
degrees <- separate(data = degrees, col = inst_sector_rev_2, into = c("inst_sector", "inst_type", "inst_level"), sep = "\\/")

Chunk #23 Writing a file to a csv

#if you wanted to write a file back to your working directory, you would run this code and it will be placed in your working directory
write.csv(degrees, file = "degrees_rev.csv",row.names=FALSE)